The main dataset used in this tutorial is about the geolocalisation of french restaurants in Paris. Data is extracted from an official register called SIRENE (Computer system for the business and establishment register) managed by the French National Institute of Statistics and Economic Studies (Insee) and geolocated by Etalab (French task force for Open Data). This register records the civil status of all companies and their establishments (including restaurants). SIRENE has the advantages of being rigorous and exhaustive on the French territory.
sf::st_read().
Reading layer `iris_75' from data source `/home/tim/Documents/prz/satRday/exercises/data/iris_75.shp' using driver `ESRI Shapefile'
Simple feature collection with 992 features and 4 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 643075.6 ymin: 6857477 xmax: 661086.2 ymax: 6867081
epsg (SRID): NA
proj4string: +proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs
plot(iris_75). What do you notice ?
sf::st_geometry() function? What solution do you propose then?
sf::st_geometry() makes it possible to isolate the information contained in the ‘geometry’ column of the sf object. Using it, we put aside other variables (here CODE_IRIS, P14_POP, AREA and CODE_COM).
st_read() and sf::st_geometry().
Reading layer `sir_75' from data source `/home/tim/Documents/prz/satRday/exercises/data/sir_75.shp' using driver `ESRI Shapefile'
Simple feature collection with 23348 features and 7 fields
geometry type: POINT
dimension: XY
bbox: xmin: 643502.1 ymin: 6857645 xmax: 659766.4 ymax: 6867041
epsg (SRID): NA
proj4string: +proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs
plot(st_geometry(iris_75), bg = "cornsilk", col = "lightblue",
border = "white", lwd = .5)
plot(st_geometry(sir_75), col = "red", pch = 20, cex = .2, add=TRUE)
title("Restaurants in Paris")sf::st_intersects() and sapply().
inter <- st_intersects(x = iris_75, y = sir_75)
iris_75$RESTAU <- sapply(inter, length)
head(iris_75)Simple feature collection with 6 features and 5 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 650181.1 ymin: 6861761 xmax: 652179 ymax: 6863138
epsg (SRID): NA
proj4string: +proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs
CODE_IRIS P14_POP AREA CODE_COM geometry RESTAU
1 751010101 974 64006.07 75101 MULTIPOLYGON (((652096.5 68... 44
2 751010102 167 62671.06 75101 MULTIPOLYGON (((652052.7 68... 6
3 751010103 246 47653.41 75101 MULTIPOLYGON (((651869 6862... 16
4 751010104 3 222656.99 75101 MULTIPOLYGON (((651639.9 68... 10
5 751010105 0 243255.23 75101 MULTIPOLYGON (((650369.8 68... 0
6 751010199 0 234277.01 75101 MULTIPOLYGON (((652096.5 68... 0
dplyr package: select, group_by et summarize. These functions also work with sf objects.
library(sf)
library(cartography)
library(dplyr)
# Import data
fra <- st_read("data/fra.shp", quiet = TRUE)
# Create the variable
com_75$nb_rest_10000inhab <- 10000 * com_75$RESTAU / com_75$P14_POP
# Define breaks
bks <- getBreaks(v = com_75$nb_rest_10000inhab, method = "quantile", nclass = 4)
# Define color palette
cols <- carto.pal("wine.pal", length(bks)-1)
# Define plot margins
par(mar = c(0.2, 0.2, 1.4, 0.2), bg = "azure")
# Find EPCI bounding box
bb <- st_bbox(com_75)
# Plot France using EPCI boundingbox
plot(st_geometry(fra), col="ivory", border = "ivory3",
xlim = bb[c(1, 3)], ylim = bb[c(2, 4)])
# Plot the choropleth layer
choroLayer(com_75, var = "nb_rest_10000inhab",
breaks = bks, col = cols, lwd = 0.5,
legend.pos = "topleft",add = TRUE, border = "white",
legend.title.txt = "Number of restaurants\nfor 10,000 inhabitants")
# Plot proportionnal symbols
propSymbolsLayer(com_75, var="RESTAU", col="#ffffff80",border = "grey20",
legend.pos="left", inches=0.3, add = TRUE, lwd = 1,
legend.title.txt = "Number of restaurants")
# Add a layout layer
layoutLayer(title = "Restaurants", sources = "Insee, 2018",
author = "Kim & Tim, 2018", tabtitle = TRUE,
theme = "green.pal", col = "darkred",
coltitle = "white",
frame = TRUE, scale = 2)
# Add a north (south) arrow
north(pos = "topright", south = TRUE)We would like to design a map of Paris arrondissments that combines the number of restaurants and the number of restaurants per 10,000 inhabitants.
Data preparation:
getBreaks et carto.pal functions of the cartography package. For the creation of the typo variable, you can use the cut function and apply the parameters digit.lab = 2 and include.lowest = TRUE.
library(sf)
library(cartography)
# Import data
com_75 <- st_read("data/com_75.shp", quiet = TRUE)
# Create the variable
com_75$rest_per_10k <- 10000 * com_75$RESTAU / com_75$P14_POP
# Define breaks
bks <- getBreaks(v = com_75$rest_per_10k, method = "quantile", nclass = 4)
# Define color palette
# display.carto.all(n = 4)
cols <- carto.pal("wine.pal", length(bks)-1)
# For ggplot2 maps - Create a "typo"" variable
library(dplyr)
com_75 <- com_75 %>%
mutate(typo = cut(rest_per_10k, breaks = bks, dig.lab = 2,
include.lowest = TRUE))cartography package, make the following map which contains in a choropleth layer the variable rest_per_10k and in a proportional circle layer the variable RESTAU. You can also try to do the same map using ggplot2 and tmap packages.
cartography# Define plot margins
par(mar = c(0, 0, 1.2, 0), bg = "cornsilk")
# Plot the choropleth layer
choroLayer(com_75, var = "rest_per_10k",
breaks = bks, col = cols,
border = "grey", lwd = 0.5,
legend.pos = "topright", legend.horiz = T,
legend.title.txt = "Number of restaurants\nper 10,000 inhabitants")
# Plot proportionnal symbols
propSymbolsLayer(com_75, var="RESTAU",
inches=0.25, lwd = 1,
col="#ffffff90", border = "grey20",
legend.pos="right",
legend.title.txt = "Number of restaurants")
# Add a layout layer
layoutLayer(title = "Restaurants Distribution in Paris",
sources = "Insee & SIRENE, 2018",
author = "Kim, Tim & Comeetie, 2019",
tabtitle = TRUE,
col = "darkred", coltitle = "white",
frame = FALSE, scale = 2)
# Add a north (south) arrow
north(pos = "topleft")ggplot2library(ggplot2)
map_ggplot <- ggplot() +
geom_sf(data = com_75, colour = "ivory3",
fill = "ivory") +
geom_sf(data = com_75, aes(fill = typo), colour = "grey80") +
scale_fill_manual(name = "Number of restaurants\nper 10,000 inhabitants",
values = cols, na.value = "#303030")+
geom_sf(data = com_75 %>% st_centroid(),
aes(size= RESTAU), col="#f5f5f5",show.legend = 'point') +
scale_size_area(max_size = 20, name = "number of restaurants" )+
coord_sf(crs = 2154, datum = NA,
xlim = st_bbox(com_75)[c(1,3)],
ylim = st_bbox(com_75)[c(2,4)]
) +
theme_minimal() +
theme(panel.background = element_rect(fill = "azure",color=NA)) +
labs(
title = "Restaurants Distribution in Paris",
caption = "Insee & SIRENE, 2018\nKim, Tim & Comeetie, 2019"
)
plot(map_ggplot)tmaplibrary(tmap)
tm_shape(com_75) +
tm_polygons("rest_per_10k",
breaks = bks,
palette=cols, title = "Number of restaurants\nper 10,000 inhabitants") +
tm_symbols(size = "RESTAU", col = "white",
alpha = 0.5, scale = 3,
title.size = "Number of Restaurants") +
tm_layout(title = "Restaurants Distribution in Paris",
title.position = c("left", "top"),
legend.position = c("right", "top"),
frame = T,
inner.margins = c(0, 0, 0.1, 0)) +
tm_compass(type = "arrow", position = c("left", "top")) +
tm_scale_bar(position = c("left", "bottom"), breaks = c(0,1,2)) +
tm_credits("Insee & SIRENE, 2018
Kim, Tim & Comeetie, 2019", position = c("left", "bottom"))In French, IRIS is an acronym of ‘aggregated units for statistical information’. Their target sizes are 2000 residents per basic unit.↩